{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Interactive Machine Learning Demo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from ipywidgets import interact, interactive, IntSlider, Layout, interact_manual\n",
    "import ipywidgets as widgets\n",
    "from IPython.display import display\n",
    "\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "#%matplotlib inline\n",
    "\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Linear Regression and Regularization"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "N_samples = 25\n",
    "x_min = -5\n",
    "x_max = 5\n",
    "x1= np.linspace(x_min,x_max,N_samples*5)\n",
    "x= np.random.choice(x1,size=N_samples)\n",
    "noise_std=1\n",
    "noise_mean=0\n",
    "noise_magnitude = 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Function definitions (ideal fitting function and actual data generating function with noise)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def func_gen(N_samples,x_min,x_max,noise_magnitude,noise_sd,noise_mean):\n",
    "    x1= np.linspace(x_min,x_max,N_samples*5)\n",
    "    x= np.random.choice(x1,size=N_samples)\n",
    "    y=2*x-0.6*x**2+0.2*x**3+18*np.sin(x)\n",
    "    y1=2*x1-0.6*x1**2+0.2*x1**3+18*np.sin(x1)\n",
    "    y= y+noise_magnitude*np.random.normal(loc=noise_mean,scale=noise_sd,size=N_samples)\n",
    "    plt.figure(figsize=(8,5))\n",
    "    plt.plot(x1,y1,c='k',lw=2)\n",
    "    plt.scatter(x,y,edgecolors='k',c='yellow',s=60)\n",
    "    plt.grid(True)\n",
    "    plt.show()\n",
    "    return (x,y,x1,y1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Call the 'interactive' widget with the data generating function, which also plots the data real-time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2838efed54074b06bec67d01ad5bee7e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/html": [
       "<p>Failed to display Jupyter Widget of type <code>interactive</code>.</p>\n",
       "<p>\n",
       "  If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
       "  that the widgets JavaScript is still loading. If this message persists, it\n",
       "  likely means that the widgets JavaScript library is either not installed or\n",
       "  not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
       "  Widgets Documentation</a> for setup instructions.\n",
       "</p>\n",
       "<p>\n",
       "  If you're reading this message in another frontend (for example, a static\n",
       "  rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
       "  it may mean that your frontend doesn't currently support widgets.\n",
       "</p>\n"
      ],
      "text/plain": [
       "interactive(children=(Dropdown(description='N_samples', options={'Low (50 samples)': 50, 'High (200 samples)': 200}, value=50), IntSlider(value=-3, description='x_min', max=0, min=-5), IntSlider(value=2, description='x_max', max=5), IntSlider(value=2, description='noise_magnitude', max=5), FloatSlider(value=0.5, description='noise_sd', max=1.0, min=0.1), FloatSlider(value=0.0, description='noise_mean', max=2.0, min=-2.0, step=0.5), Output()), _dom_classes=('widget-interact',))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "p=interactive(func_gen,N_samples={'Low (50 samples)':50,'High (200 samples)':200},x_min=(-5,0,1), x_max=(0,5,1),\n",
    "              noise_magnitude=(0,5,1),noise_sd=(0.1,1,0.1),noise_mean=(-2,2,0.5))\n",
    "display(p)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Extract the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x,y,x1,y1 = p.result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load scikit-learn libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import PolynomialFeatures\n",
    "from sklearn.linear_model import LassoCV\n",
    "from sklearn.linear_model import RidgeCV\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.pipeline import make_pipeline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Machine learning (regression) model encapsulated within a function "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "lasso_eps = 0.01\n",
    "lasso_nalpha=20\n",
    "lasso_iter=3000\n",
    "ridge_alphas = (0.001,0.01,0.1,1)\n",
    "\n",
    "def func_fit(model_type,test_size,degree):\n",
    "    X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=test_size,random_state=55)\n",
    "    \n",
    "    t1=np.min(X_test)\n",
    "    t2=np.max(X_test)\n",
    "    t3=np.min(y_test)\n",
    "    t4=np.max(y_test)\n",
    "    \n",
    "    t5=np.min(X_train)\n",
    "    t6=np.max(X_train)\n",
    "    t7=np.min(y_train)\n",
    "    t8=np.max(y_train)\n",
    "    \n",
    "    posx_test=t1+(t2-t1)*0.7\n",
    "    posx_train=t5+(t6-t5)*0.7\n",
    "    posy_test=t3+(t4-t3)*0.2\n",
    "    posy_train=t7+(t8-t7)*0.2\n",
    "    \n",
    "    if (model_type=='Linear regression'):\n",
    "        model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), \n",
    "                          LinearRegression(normalize=True))\n",
    "    if (model_type=='LASSO with CV'):    \n",
    "        model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), \n",
    "                              LassoCV(eps=lasso_eps,n_alphas=lasso_nalpha,max_iter=lasso_iter,normalize=True,cv=5))\n",
    "        \n",
    "    if (model_type=='Ridge with CV'):    \n",
    "        model = make_pipeline(PolynomialFeatures(degree,interaction_only=False), \n",
    "                              RidgeCV(alphas=ridge_alphas,normalize=True,cv=5))\n",
    "    \n",
    "    X_train=X_train.reshape(-1,1)\n",
    "    X_test=X_test.reshape(-1,1)\n",
    "    \n",
    "    model.fit(X_train,y_train)\n",
    "    \n",
    "    train_pred = np.array(model.predict(X_train))\n",
    "    train_score = model.score(X_train,y_train)\n",
    "    \n",
    "    test_pred = np.array(model.predict(X_test))\n",
    "    test_score = model.score(X_test,y_test)\n",
    "    \n",
    "    RMSE_test=np.sqrt(np.mean(np.square(test_pred-y_test)))\n",
    "    RMSE_train=np.sqrt(np.mean(np.square(train_pred-y_train)))\n",
    "    \n",
    "    print(\"Test score: {}, Training score: {}\".format(test_score,train_score))\n",
    "    \n",
    "    print(\"RMSE Test: {}, RMSE train: {}\".format(RMSE_test,RMSE_train))\n",
    "    \n",
    "    plt.figure(figsize=(12,4))\n",
    "    \n",
    "    plt.subplot(1,2,1)\n",
    "    plt.title(\"Test set performance\\n\",fontsize=16)\n",
    "    plt.xlabel(\"X-test\",fontsize=13)\n",
    "    plt.ylabel(\"y-test\",fontsize=13)\n",
    "    plt.scatter(X_test,y_test,edgecolors='k',c='blue',s=60)\n",
    "    plt.scatter(X_test,test_pred,edgecolors='k',c='yellow',s=60)\n",
    "    plt.grid(True)\n",
    "    plt.legend(['Actual test values','Predicted values'])\n",
    "    plt.text(x=posx_test,y=posy_test,s='Test score: %.3f'%(test_score),fontsize=15)\n",
    "    \n",
    "    plt.subplot(1,2,2)\n",
    "    plt.title(\"Training set performance\\n\",fontsize=16)\n",
    "    plt.xlabel(\"X-train\",fontsize=13)\n",
    "    plt.ylabel(\"y-train\",fontsize=13)\n",
    "    plt.scatter(X_train,y_train,c='blue')\n",
    "    plt.scatter(X_train,train_pred,c='yellow')\n",
    "    plt.grid(True)\n",
    "    plt.legend(['Actual training values','Fitted values'])\n",
    "    plt.text(x=posx_train,y=posy_train,s='Training score: %.3f'%(train_score),fontsize=15)\n",
    "    \n",
    "    plt.show()\n",
    "       \n",
    "    return (train_score,test_score)    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Run the encapsulated ML function with ipywidget interactive"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c07f081012e6401a8e5a47a104103310",
       "version_major": 2,
       "version_minor": 0
      },
      "text/html": [
       "<p>Failed to display Jupyter Widget of type <code>interactive</code>.</p>\n",
       "<p>\n",
       "  If you're reading this message in the Jupyter Notebook or JupyterLab Notebook, it may mean\n",
       "  that the widgets JavaScript is still loading. If this message persists, it\n",
       "  likely means that the widgets JavaScript library is either not installed or\n",
       "  not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
       "  Widgets Documentation</a> for setup instructions.\n",
       "</p>\n",
       "<p>\n",
       "  If you're reading this message in another frontend (for example, a static\n",
       "  rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
       "  it may mean that your frontend doesn't currently support widgets.\n",
       "</p>\n"
      ],
      "text/plain": [
       "interactive(children=(RadioButtons(description='Choose Model', layout=Layout(width='250px'), options=('Linear regression', 'LASSO with CV', 'Ridge with CV'), style=DescriptionStyle(description_width='initial'), value='Linear regression'), Dropdown(description='Test set size', options={'10% of data': 0.1, '20% of data': 0.2, '30% of data': 0.3, '40% of data': 0.4, '50% of data': 0.5}, style=DescriptionStyle(description_width='initial'), value=0.1), IntSlider(value=1, continuous_update=False, description='Polynomial degree', max=10, min=1), Output(layout=Layout(height='350px'))), _dom_classes=('widget-interact',))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "style = {'description_width': 'initial'}\n",
    "# Continuous_update = False for IntSlider control to stop continuous model evaluation while the slider is being dragged\n",
    "m = interactive(func_fit,model_type=widgets.RadioButtons(options=['Linear regression','LASSO with CV', 'Ridge with CV'],\n",
    "                                                    description = \"Choose Model\",style=style,\n",
    "                                                        layout=Layout(width='250px')),\n",
    "                test_size=widgets.Dropdown(options={\"10% of data\":0.1,\"20% of data\":0.2, \"30% of data\":0.3,\n",
    "                                                    \"40% of data\":0.4,\"50% of data\":0.5},\n",
    "                                          description=\"Test set size\",style=style),\n",
    "               degree=widgets.IntSlider(min=1,max=10,step=1,description= 'Polynomial degree',\n",
    "                                       stye=style,continuous_update=False))\n",
    "\n",
    "# Set the height of the control.children[-1] so that the output does not jump and flicker\n",
    "output = m.children[-1]\n",
    "output.layout.height = '350px'\n",
    "\n",
    "# Display the control\n",
    "display(m)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
